*************************************************************;
* emodule quiz analysis                                     *;
*************************************************************;

*get data for the quiz;
PROC IMPORT OUT= WORK.one 
            DATAFILE= "C:\\quiz2022_23_dat.txt" /*change your path*/
            DBMS=TAB REPLACE;
     GETNAMES=YES;
     DATAROW=2; 
RUN;

proc format;
   value rand_gp 1='E-module/online'
                 2='E-module/in-person'
				 3='Traditional/online'
				 4='Traditional/in-person';
data two;
   set one;
   *number correct on quiz;
   n_correct=sum(q1_correct, q2_correct, q3_correct, q4_correct, q5_correct, q6_correct, q7_correct, q8_correct, q9_correct, q10_correct);

   *create 4 groups of randomized group and whether in person or online;
   if group='online' and randomization='Emodule' then  rand_gp=1;
   else if group='inperson' and randomization='Emodule' then  rand_gp=2;
   else if group='online' and randomization='Traditional' then  rand_gp=3;
   else if group='inperson' and randomization='Traditional' then  rand_gp=4;
   format rand_gp rand_gp.;
run;
*quiz 1 data;
data quiz1;
   set two;
   where quiz=1;
   proportion=percent/100;
   
   *1=10 on quiz, 0=<10 on quiz;
   if n_correct=10 then grade2=1; 
   else if 0 le n_correct lt 10 then grade2=0;
run;
*quiz 2 data;
data quiz2;
   set two;
   where quiz=2;
   percent2=percent;
   n_correct2=n_correct;
   keep id percent2 n_correct2;
run;
*calculate difference in quiz1 and quiz2;
proc sort data=quiz1;by id;
proc sort data=quiz2;by id;
data pre_post;
   merge quiz1 (in=sh) quiz2 (in=bl);
   by id;
   if sh and bl;
   diffpct=percent2-percent;
   diffcorrect=n_correct2-n_correct;
run;

*percent correct quiz1;
proc means data=quiz1 n mean std median min max clm maxdec=3;
   class year group randomization;
   var percent;
run;
proc means data=quiz1 n mean std median min max maxdec=3;
   class year  randomization;
   var percent;
run;
*number correct quiz 1;
proc means data=quiz1 n mean std  min q1 median q3 max maxdec=1;
   class year group randomization;
   var n_correct;
run;
proc means data=quiz1 n mean std  min q1 median q3 max maxdec=1;
   class year  randomization;
   var n_correct;
run;



proc npar1way data=quiz1 wilcoxon;
   where year=2022;
   class randomization;
   var n_correct;
run;
proc sort data=quiz1;by year;
proc npar1way data=quiz1 wilcoxon;
   by year;
   class rand_gp;
   var n_correct;
run;
proc npar1way data=quiz1 wilcoxon;
   where year=2023;
   class randomization;
   var n_correct;
run;
proc sort data=quiz1;by year;
proc npar1way data=quiz1 wilcoxon;
   where year=2023;
   class rand_gp;
   var n_correct;
run;
* pairwise comparisons for 2022;
proc npar1way data=quiz1 wilcoxon;
where year=2022 and (rand_gp=1 or rand_gp=2);
   class rand_gp ;
   var n_correct;
run;
proc npar1way data=quiz1 wilcoxon;
where year=2022 and (rand_gp=1 or rand_gp=3);
   class rand_gp;
   var n_correct;
run;
proc npar1way data=quiz1 wilcoxon;
where year=2022 and (rand_gp=1 or rand_gp=4);
   class rand_gp;
   var n_correct;
run;
proc npar1way data=quiz1 wilcoxon;
where year=2022 and (rand_gp=2 or rand_gp=3);
   class rand_gp;
   var n_correct;
run;
proc npar1way data=quiz1 wilcoxon;
where year=2022 and (rand_gp=2 or rand_gp=4);
   class rand_gp;
   var n_correct;
run;
proc npar1way data=quiz1 wilcoxon;
where year=2022 and (rand_gp=3 or rand_gp=4);
   class rand_gp;
   var n_correct;
run;

*difference in quiz1 and quiz2;
proc univariate data=pre_post;
   var diffcorrect;
run;
proc npar1way data=pre_post wilcoxon;
   class rand_gp;
   var diffcorrect;
run;
proc npar1way data=pre_post wilcoxon;
   class year;
   var diffcorrect;
run;
/*
proc freq data=quiz1;
tables n_correct grade2;
run;

proc freq data=quiz1;
   table randomization n_correct;
run;
proc freq data=quiz1;
   table year*randomization*n_correct;
run;
proc freq data=quiz1;
   table year*group*randomization;
run;
proc freq data=pre_post;
   table year*group*randomization;
run;
*/

proc mixed data=quiz1;
   class year group randomization;
   model percent=year|group|randomization;
run;
proc mixed data=quiz1;
   class year group randomization;
   model percent=year group randomization year*group group*randomization year*randomization;
run;
proc mixed data=quiz1 plots=all;
   class year group randomization;
   model percent=year group randomization year*group year*randomization/s cl;
   lsmeans year*group /pdiff adjust=tukey;
   lsmeans year*randomization/pdiff adjust=tukey;
run;


*logistic model of getting 10 out of 10 correct;
proc logistic data=quiz1 desc;
   class year group randomization/PARAM=ref;
   model grade2=year|group|randomization;
run;
proc logistic data=quiz1 desc;
   class year group randomization/PARAM=ref;
   model grade2=year group randomization year*group group*randomization year*randomization;
run;
proc logistic data=quiz1 desc;
   class year group randomization /PARAM=ref;
   model grade2=year group randomization year*group year*randomization;
run;



*question by question analysis;
proc freq data=two;
  where year=2022;
   tables (q1_correct q2_correct q3_correct q4_correct q5_correct q6_correct q7_correct
q8_correct q9_correct q10_correct)*randomization/chisq;
run;
proc freq data=two;
  where year=2023;
   tables (q1_correct q2_correct q3_correct q4_correct q5_correct q6_correct q7_correct
q8_correct q9_correct q10_correct)*randomization/chisq;
run;


